home *** CD-ROM | disk | FTP | other *** search
/ Celestin Apprentice 2 / Apprentice-Release2.iso / Source Code / C / Snippets / EMBL Search / Sources / search.c < prev    next >
Encoding:
C/C++ Source or Header  |  1992-05-04  |  13.9 KB  |  568 lines  |  [TEXT/KAHL]

  1. /*
  2. *********************************************************************
  3. *    
  4. *    Search.c
  5. *    Lookup of records in target and hit file
  6. *        
  7. *    Rainer Fuchs
  8. *    EMBL Data Library
  9. *    Postfach 10.2209
  10. *    D-6900 Heidelberg, FRG
  11. *    E-mail: fuchs@embl-heidelberg.de
  12. *
  13. *    Copyright © 1992 EMBL Data Library
  14. *        
  15. **********************************************************************
  16. *    
  17. */ 
  18.  
  19. #include <stdio.h>
  20. #include <string.h>
  21.  
  22. #include "EMBL-Search.h"
  23. #include "EMBL-Search.rsrc.h"
  24.  
  25. /*
  26. ******************************* Prototypes ***************************
  27. */
  28.  
  29. #include "search.h"
  30. #include "pstr.h"
  31. #include "util.h"
  32. #include "bsearch.h"
  33. #include "hitstorage.h"
  34.  
  35. static Boolean MarkHits(short fd, StringPtr fName, TargetRec *myTrgRec, u_long nhits, HitmapHdl hitmapHdl);
  36.  
  37.  
  38.  
  39.  
  40. /*
  41. ********************************* Global variables *********************
  42. */
  43.  
  44. extern DBInfo        gDBInfo[DB_NUM];
  45. extern WDRec        gWindows[MAXWIN];
  46. extern short        gResNo;
  47. extern IndexFiles    gFileList;
  48. extern char            gError[256];
  49.  
  50. char AndOperator[] = " AND ";
  51. char OrOperator[] = " OR ";
  52. char NotOperator[] = "NOT ";
  53.  
  54.  
  55.  
  56. HitmapHdl DoSearch(short what, StringPtr key, short dbcode)
  57. {
  58.     HitmapHdl    masterHitmapHdl,hitmapHdl;
  59.     Boolean        ret;
  60.     StringPtr    targetFName,hitFName;
  61.     u_long        target_nrec;
  62.     u_short        target_recsize;
  63.     short            maxkeylen;
  64.     char            str[256],*posOr,*posAnd,*posStop,*token;
  65.     Boolean        done;
  66.     Boolean        bNot;
  67.     short            nextOp,prevOp;
  68.     long            offset;
  69.  
  70.     pstrcpy((StringPtr)str,(StringPtr)key);
  71.     PtoCstr((StringPtr)str);
  72.     str2upper(rtrim(str));
  73.     
  74.     switch(what) {
  75.         case ENAME_QRY:
  76.             break;
  77.         case ACCNUM_QRY:
  78.             targetFName     = gFileList.acnumTrgFName;
  79.             hitFName            = gFileList.acnumHitFName;
  80.             target_nrec     = gDBInfo[dbcode].actrg_nrec;
  81.             target_recsize = gDBInfo[dbcode].actrg_recsize;
  82.             maxkeylen        = gDBInfo[dbcode].actrg_recsize-2*sizeof(long);
  83.             break;
  84.         case KEYWORD_QRY:
  85.             targetFName     = gFileList.keywTrgFName;
  86.             hitFName            = gFileList.keywHitFName;
  87.             target_nrec     = gDBInfo[dbcode].kwtrg_nrec;
  88.             target_recsize = gDBInfo[dbcode].kwtrg_recsize;
  89.             maxkeylen        = gDBInfo[dbcode].kwtrg_recsize-2*sizeof(long);
  90.             break;
  91.         case FREETEXT_QRY:
  92.             targetFName     = gFileList.textTrgFName;
  93.             hitFName            = gFileList.textHitFName;
  94.             target_nrec     = gDBInfo[dbcode].texttrg_nrec;
  95.             target_recsize = gDBInfo[dbcode].texttrg_recsize;
  96.             maxkeylen        = gDBInfo[dbcode].texttrg_recsize-2*sizeof(long);
  97.             break;
  98.         case AUTHOR_QRY:
  99.             targetFName     = gFileList.authorTrgFName;
  100.             hitFName            = gFileList.authorHitFName;
  101.             target_nrec     = gDBInfo[dbcode].authortrg_nrec;
  102.             target_recsize = gDBInfo[dbcode].authortrg_recsize;
  103.             maxkeylen        = gDBInfo[dbcode].authortrg_recsize-2*sizeof(long);
  104.             break;
  105.         case TAXON_QRY:
  106.             targetFName     = gFileList.taxonTrgFName;
  107.             hitFName            = gFileList.taxonHitFName;
  108.             target_nrec     = gDBInfo[dbcode].taxontrg_nrec;
  109.             target_recsize = gDBInfo[dbcode].taxontrg_recsize;
  110.             maxkeylen        = gDBInfo[dbcode].taxontrg_recsize-2*sizeof(long);
  111.             break;
  112.     }
  113.             
  114.     masterHitmapHdl = NULL;
  115.     token = str;
  116.     done = FALSE;
  117.     nextOp = prevOp = BOOLEAN_NOOP;
  118.     while(!done && *token != EOS) {
  119.         bNot = FALSE;
  120.         posOr = posAnd = NULL;
  121.             
  122.         posOr = strstr(token,OrOperator);
  123.         posAnd = strstr(token,AndOperator);
  124.         if(posOr != posAnd) {
  125.             if(posOr == NULL)
  126.                 nextOp = BOOLEAN_AND;
  127.             else if(posAnd == NULL)
  128.                 nextOp = BOOLEAN_OR;
  129.             else if(posAnd < posOr)
  130.                 nextOp = BOOLEAN_AND;
  131.             else
  132.                 nextOp = BOOLEAN_OR;
  133.         
  134.             if(nextOp == BOOLEAN_AND) posStop = posAnd;
  135.             else posStop = posOr;
  136.  
  137.             *posStop = EOS;
  138.         }
  139.         else nextOp = BOOLEAN_NOOP;
  140.                         
  141.         /* allocate new hitmap */
  142.         if( !NewHitmap(&hitmapHdl,dbcode) ) {
  143.             ErrorMsg(ERR_MEMORY);
  144.             return(masterHitmapHdl);
  145.         }
  146.         if (masterHitmapHdl == NULL )                /* make first hitmap master hitmap */
  147.             masterHitmapHdl = hitmapHdl;
  148.             
  149.         ltrim(token);
  150.         rtrim(token);
  151.         
  152.         if(!strncmp(token,NotOperator,strlen(NotOperator))) {
  153.             bNot = TRUE;
  154.             token += strlen(NotOperator);
  155.             ltrim(token);
  156.         }
  157.         
  158.         /* search for key, populate hitmap with hits */
  159.         if (what == ENAME_QRY)
  160.             ret=FindEntryname(token,dbcode,hitmapHdl);
  161.         else
  162.             ret=FindHits(token,dbcode,hitmapHdl,
  163.                         target_recsize,targetFName,target_nrec,
  164.                         general_compare,hitFName,maxkeylen);
  165.                         
  166.         if(!ret) { /* not found or error */
  167.             DisposHandle((Handle)hitmapHdl);
  168.             if(masterHitmapHdl == hitmapHdl)    /* first query */
  169.                 masterHitmapHdl = NULL;
  170.             else if(prevOp == BOOLEAN_AND) {    /* destroy previous results if
  171.                                                        previous operator was AND and
  172.                                                        nothing was found in new query */
  173.                 DisposHandle((Handle)masterHitmapHdl);
  174.                 masterHitmapHdl = NULL;
  175.             }
  176.         }
  177.         else {
  178.             if(bNot)
  179.                 NotHitmap(hitmapHdl);
  180.                 
  181.             if(prevOp != BOOLEAN_NOOP) {
  182.                 if(prevOp == BOOLEAN_OR)
  183.                     OrHitmaps(masterHitmapHdl,hitmapHdl);
  184.                 else
  185.                     AndHitmaps(masterHitmapHdl,hitmapHdl);
  186.                     
  187.                 DisposHandle((Handle)hitmapHdl);
  188.             }
  189.         }
  190.         
  191.         if(nextOp == BOOLEAN_NOOP)
  192.             done = TRUE;
  193.         else {
  194.             if(nextOp == BOOLEAN_AND)
  195.                 offset = (long)strlen(AndOperator);
  196.             else
  197.                 offset = (long)strlen(OrOperator);
  198.                 
  199.             token = posStop + offset;
  200.             
  201.             if(masterHitmapHdl != NULL)
  202.                 prevOp = nextOp;
  203.             else prevOp = BOOLEAN_NOOP;    /* in case an AND operation failed, we
  204.                                             reset prevOp */
  205.         }
  206.     }
  207.  
  208.     return(masterHitmapHdl);
  209. }
  210.  
  211.  
  212. /**************************************
  213. *    Search index for key.
  214. *    We perform a binary search on CD.
  215. *    Return value:    TRUE, if successful
  216. *                        FALSE, if an error occurred
  217. */
  218.  
  219. Boolean FindHits(char *key, short dbcode, HitmapHdl hitmapHdl,
  220.                       u_short target_recsize,StringPtr targetFName,u_long target_nrec,
  221.                       short (*compare)(void *,void *),
  222.                       StringPtr hitFName, short maxkeylen )
  223. {
  224.     char                mykey[256];
  225.     TargetRec        *myTrgRec;
  226.     u_long            pos,oldpos,n_hits;
  227.     long                count;
  228.     short                i;
  229.     short                fd1,fd2;
  230.     OSErr                err;
  231.     Str255            fName1,fName2;
  232.     Boolean            bWildCardSearch,bDone;
  233.     
  234.     /* we pad search terms to maxlen with blanks, so we can do a direct comparison to
  235.         records we read from index. If we didn't do so we could eg find a hit of
  236.         "ebv" with something like "ebv123" if we did a strncmp(..,strlen(key)) or we
  237.         would have to rtrim all records read from index in order to do a strcmp()
  238.         (trailing blanks!)
  239.         
  240.         For wildcard searches we can now get rid of the trailing blanks
  241.     */
  242.  
  243.     strcpy(mykey,key);    /* • we can speed this up • */
  244.     if(mykey[strlen(mykey)-1] == WILDCARD1) {
  245.         bWildCardSearch = TRUE;
  246.         mykey[strlen(mykey)-1] = EOS;
  247.     }
  248.     else {
  249.         bWildCardSearch = FALSE;
  250.         rpad(mykey,' ',maxkeylen);
  251.     }
  252.     
  253.     /* allocate memory for target records */
  254.     if ( (myTrgRec=(TargetRec *)NewPtrClear((Size)target_recsize)) == NULL)
  255.         return(ErrorMsg(ERR_MEMORY));
  256.  
  257.     /* Open index  */
  258.     pstrcpy(fName1,targetFName);
  259.     if( OpenMacFileReadOnly(fName1,gDBInfo[dbcode].InxWDRefNum,&fd1,TRUE) != noErr ) {
  260.         DisposPtr((Ptr)myTrgRec);
  261.         return(FALSE);
  262.     }
  263.  
  264.     /* do the binary search */
  265.     if( !CDIndex_BSearch(str2upper(mykey),
  266.                                 fName1,
  267.                                 fd1,
  268.                                 (long)target_recsize,
  269.                                 target_nrec,
  270.                                 compare,
  271.                                 myTrgRec, &pos) ) {
  272.         DisposPtr((Ptr)myTrgRec);
  273.         FSClose(fd1);
  274.         return(FALSE);
  275.     }
  276.     n_hits = ConvertLong(&myTrgRec->nhits);
  277.     
  278.     RotateWaitCursor();
  279.     
  280.     /* Open hit file */
  281.     pstrcpy(fName2,hitFName);
  282.     if ( OpenMacFileReadOnly(fName2,gDBInfo[dbcode].InxWDRefNum,&fd2,TRUE) != noErr ) {
  283.         FSClose(fd1);
  284.         DisposPtr((Ptr)myTrgRec);
  285.         return(FALSE);
  286.     }
  287.     
  288.     /* Mark hits in hitmap */
  289.     if( !MarkHits(fd2,fName2,myTrgRec,n_hits,hitmapHdl) ) {
  290.         FSClose(fd1);
  291.         FSClose(fd2);
  292.         DisposPtr((Ptr)myTrgRec);
  293.         return(FALSE);
  294.     }
  295.     
  296.     /* Wildcard search:
  297.         After we found a matching record during the binary search, we step backwards
  298.         as long as we find matching records. Then we do the same in the other direction.
  299.         This strategy is primitive, of course, and should be improved.
  300.     */
  301.     
  302.     if( bWildCardSearch ) {
  303.         oldpos = pos;                /* store current position */
  304.         
  305.         /* Now go backwards in index, looking for matches */
  306.         bDone = FALSE;
  307.         while( !bDone && pos > 0 ) {
  308.             --pos;
  309.             if( (err=SetFPos(fd1,fsFromStart,
  310.                         pos*(long)target_recsize+sizeof(Header))) != noErr) {
  311.                 sprintf(gError,LoadErrorStr(ERR_READFILE,FALSE),
  312.                         PtoCstr(fName1),err );
  313.                 CtoPstr((char *)fName1);
  314.                 FSClose(fd1);
  315.                 FSClose(fd2);
  316.                 DisposPtr((Ptr)myTrgRec);
  317.                 return(ErrorMsg(0));
  318.             }
  319.  
  320.             count = (long)target_recsize;
  321.             if( ReadMacFile(fd1,&count,(void *)myTrgRec,fName1,TRUE) ) {
  322.                 FSClose(fd1);
  323.                 FSClose(fd2);
  324.                 DisposPtr((Ptr)myTrgRec);
  325.                 return(FALSE);
  326.             }
  327.                 
  328.             if(compare(mykey,myTrgRec))
  329.                 bDone = TRUE;
  330.             else {
  331.                 n_hits = ConvertLong(&myTrgRec->nhits);
  332.  
  333.                 if( !MarkHits(fd2,fName2,myTrgRec,n_hits,hitmapHdl) ) {
  334.                     FSClose(fd1);
  335.                     FSClose(fd2);
  336.                     DisposPtr((Ptr)myTrgRec);
  337.                     return(FALSE);
  338.                 }
  339.             }
  340.         }
  341.         
  342.         pos = oldpos;                /* jump to starting position */
  343.         
  344.         /* and go forwards in index, looking for matches */
  345.         bDone = FALSE;
  346.         while (!bDone && pos < target_nrec - 1) {
  347.             ++pos;
  348.             if( (err=SetFPos(fd1,fsFromStart,
  349.                         pos*(long)target_recsize+sizeof(Header))) != noErr) {
  350.                 sprintf(gError,LoadErrorStr(ERR_READFILE,FALSE),
  351.                         PtoCstr(fName1),err );
  352.                 CtoPstr((char *)fName1);
  353.                 FSClose(fd1);
  354.                 FSClose(fd2);
  355.                 DisposPtr((Ptr)myTrgRec);
  356.                 return(ErrorMsg(0));
  357.             }
  358.  
  359.             count = (long)target_recsize;
  360.             if( ReadMacFile(fd1,&count,myTrgRec,fName1,TRUE) ) {
  361.                 FSClose(fd1);
  362.                 FSClose(fd2);
  363.                 DisposPtr((Ptr)myTrgRec);
  364.                 return(FALSE);
  365.             }
  366.                 
  367.             if(compare(mykey,myTrgRec))
  368.                 bDone = TRUE;
  369.             else {
  370.                 n_hits = ConvertLong(&myTrgRec->nhits);
  371.  
  372.                 if( !MarkHits(fd2,fName2,myTrgRec,n_hits,hitmapHdl) ) {
  373.                     FSClose(fd1);
  374.                     FSClose(fd2);
  375.                     DisposPtr((Ptr)myTrgRec);
  376.                     return(FALSE);
  377.                 }
  378.             }
  379.         } while ( !bDone );
  380.     }
  381.     
  382.     FSClose(fd1);
  383.     FSClose(fd2);
  384.     DisposPtr((Ptr)myTrgRec);
  385.     return(TRUE);
  386. }
  387.  
  388. /**************************************
  389. *    Find entryname in index.
  390. *    We perform a binary search on CD.
  391. *    Return value:    TRUE, if successful
  392. *                        FALSE, if an error occurred
  393. */
  394.  
  395. Boolean FindEntryname(char *key, short dbcode, HitmapHdl hitmapHdl)
  396. {
  397.     char        entryName[ENTRYNAMELEN+1];
  398.     u_long    pos,oldpos;
  399.     Boolean    ret;
  400.     EnameRec    rec;
  401.     Boolean    bWildCardSearch,bDone;
  402.     short        fd;
  403.     Str255    indexName;
  404.     long        count;
  405.     OSErr        err;
  406.     
  407.     /* we pad entrynames to maxlen with blanks, so we can do a direct comparison to
  408.         records we read from index. If we didn't do so we could eg find a hit of
  409.         "ebv" with something like "ebv123" if we did a strncmp(..,strlen(key)) or we
  410.         would have to rtrim all records read from index to in order to do a strcmp()
  411.         (trailing blanks!)
  412.     
  413.     For wildcard searches we don't need the trailing blanks
  414.     */
  415.     
  416.     strncpy(entryName,key,ENTRYNAMELEN);    /* • we can speed this up • */
  417.     entryName[ENTRYNAMELEN] = EOS;
  418.     if(entryName[strlen(entryName)-1] == WILDCARD1) {
  419.         bWildCardSearch = TRUE;
  420.         entryName[strlen(entryName)-1] = EOS;
  421.     }
  422.     else {
  423.         bWildCardSearch = FALSE;
  424.         rpad(entryName,' ',ENTRYNAMELEN);
  425.     }
  426.     
  427.     /* Open index  */
  428.     pstrcpy(indexName,gFileList.enameIdxFName);
  429.     if( OpenMacFileReadOnly(indexName,gDBInfo[dbcode].InxWDRefNum,&fd,TRUE) != noErr )
  430.         return(FALSE);
  431.  
  432.     /* do the binary search */
  433.     if( !CDIndex_BSearch(str2upper(entryName),
  434.                                 indexName,
  435.                                 fd,
  436.                                 sizeof(EnameRec),
  437.                                 gDBInfo[dbcode].ename_nrec,
  438.                                 ename_compare,
  439.                                 &rec, &pos) ) {
  440.         FSClose(fd);
  441.         return(FALSE);
  442.     }
  443.     
  444.     /* Mark hit in hitmap */
  445.     BitSet((Ptr)*hitmapHdl,pos);
  446.     
  447.     /* Wildcard search:
  448.         After we found a matching record during the binary search, we step backwards
  449.         as long as we find matching records. Then we do the same in the other direction
  450.     */
  451.  
  452.     if( bWildCardSearch ) {
  453.         oldpos = pos;                /* store current position */
  454.         
  455.         /* Now go backwards in index, looking for matches */
  456.         bDone = FALSE;
  457.         while(!bDone && pos > 0) {
  458.             --pos;
  459.             if( (err=SetFPos(fd,fsFromStart,
  460.                         pos*sizeof(EnameRec)+sizeof(Header))) != noErr) {
  461.                 sprintf(gError,LoadErrorStr(ERR_READFILE,FALSE),
  462.                         PtoCstr(indexName),err );
  463.                 CtoPstr((char *)indexName);
  464.                 FSClose(fd);
  465.                 return(ErrorMsg(0));
  466.             }
  467.  
  468.             count = sizeof(EnameRec);
  469.             if( ReadMacFile(fd,&count,&rec,indexName,TRUE) ) {
  470.                 FSClose(fd);
  471.                 return(FALSE);
  472.             }
  473.                 
  474.             if(ename_compare(entryName,&rec))
  475.                 bDone = TRUE;
  476.             else
  477.                 BitSet((Ptr)*hitmapHdl,pos);
  478.         }
  479.         
  480.         pos = oldpos;                /* jump to starting position */
  481.         
  482.         /* and go forwards in index, looking for matches */
  483.         bDone = FALSE;
  484.         while(!bDone && pos < gDBInfo[dbcode].ename_nrec - 1) {
  485.             ++pos;
  486.             if( (err=SetFPos(fd,fsFromStart,
  487.                         pos*sizeof(EnameRec)+sizeof(Header))) != noErr) {
  488.                 sprintf(gError,LoadErrorStr(ERR_READFILE,FALSE),
  489.                         PtoCstr(indexName),err );
  490.                 CtoPstr((char *)indexName);
  491.                 FSClose(fd);
  492.                 return(ErrorMsg(0));
  493.             }
  494.  
  495.             count = sizeof(EnameRec);
  496.             if( ReadMacFile(fd,&count,&rec,indexName,TRUE) ) {
  497.                 FSClose(fd);
  498.                 return(FALSE);
  499.             }
  500.                 
  501.             if(ename_compare(entryName,&rec))
  502.                 bDone = TRUE;
  503.             else
  504.                 BitSet((Ptr)*hitmapHdl,pos);
  505.         }
  506.     }
  507.     
  508.     FSClose(fd);
  509.     return(TRUE);
  510. }
  511.     
  512.  
  513. /**************************************
  514. *    Read hit file and mark hits in hitmap
  515. *    Return value:    TRUE, if successful
  516. *                        FALSE, if an error occurred
  517. */
  518.  
  519. static Boolean MarkHits(short fd, StringPtr fName, TargetRec *myTrgRec, u_long nhits,
  520.                         HitmapHdl hitmapHdl)
  521. {
  522.     u_long    hitpos;
  523.     OSErr        err;
  524.     long        i;
  525.     u_long    count,pos;
  526.     
  527.     /* Go to first hit in hit file */
  528.     hitpos = ConvertLong(&myTrgRec->hitPtr) - 1;
  529.     if( (err=SetFPos(fd,fsFromStart,
  530.                 hitpos*sizeof(u_long) + sizeof(Header))) != noErr) {
  531.         sprintf(gError,LoadErrorStr(ERR_READFILE,FALSE),
  532.                         PtoCstr(fName),err );
  533.         return(ErrorMsg(0));
  534.     }
  535.  
  536.     /* Now read "nhits" hits from there */
  537.     for(i=0,count= (u_long) sizeof(u_long);i < nhits; ++i) {
  538.         if( ReadMacFile(fd,(long *)&count,&pos,fName,TRUE) ) 
  539.             return(FALSE);
  540.  
  541.         /* Convert hit record to C-type array index */
  542.         ConvertLong(&pos);
  543.         BitSet((Ptr)*hitmapHdl,--pos);
  544.     }
  545.     
  546.     return(TRUE);
  547. }
  548.  
  549.  
  550.  
  551. /**************************************
  552. *    Compare routine for entryname bsearch
  553. */
  554.  
  555. short ename_compare(void *key,void *rec)
  556. {
  557.     return( strncmp((char *)key,((EnameRec *)rec)->entry_name,strlen((char *)key)) );
  558. }
  559.  
  560.  
  561. /**************************************
  562. *    Compare routine for other bsearches
  563. */
  564.  
  565. short general_compare(void *key,void *rec)
  566. {
  567.     return( strncmp((char *)key,((TargetRec *)rec)->value,strlen((char *)key)));
  568. }